##### analysis main models  ######

rm(list = ls())

library(tidyverse)
library(haven)
library(labelled)
library(data.table)
library(cregg)
library(nortest) # Anderson-Darling test for normality 
library(car) # various assumption tests
library(stargazer)
# Load conjoint data
dfconj <- read_csv("/Users/cbrugge/Desktop/CE Paper/w8_conjoint.csv") %>% 
  as.data.frame()


# marginal means and AMCEs ------------------------------------------------

dfconj$Price <- as.factor(dfconj$Price)
dfconj$`Functional duration` <- as.factor(dfconj$`Functional duration`)
dfconj$`Env. production impact` <- as.factor(dfconj$`Env. production impact`)
dfconj$`Energy efficiency` <- as.factor(dfconj$`Energy efficiency`)
dfconj$Recyclability <- as.factor(dfconj$Recyclability)
dfconj$Repairability <- as.factor(dfconj$Repairability)
dfconj$product_type <- as.factor(dfconj$product_type)


# reorder levels

dfconj$Price <- factor(dfconj$Price, 
                       levels = c("180 / 250 / 470 CHF", "550 / 850 / 890 CHF",
                                  "920 / 1900 / 1650 CHF", "1300 / 3100 / 3200 CHF"))

dfconj$`Functional duration` <- factor(dfconj$`Functional duration`, 
                                   levels = c("2 / 2 / 5 years", "4 / 8 / 10 years",
                                              "8 / 15 / 15 years"))

dfconj$`Env. production impact` <- factor(dfconj$`Env. production impact`, 
                                                      levels = c("Very low", "Medium",
                                                                 "Very high"))

dfconj$`Energy efficiency` <- factor(dfconj$`Energy efficiency`, 
                                     levels = c("G", "E",
                                                "C", "A"))

dfconj$Recyclability <- factor(dfconj$Recyclability, 
                               levels = c("Not recyclable", "Limited recyclable",
                                          "Recyclable"))

dfconj$Repairability <- factor(dfconj$Repairability, 
                               levels = c("Not repairable at all", "Repairable to a limited degree",
                                          "Repairable"))


# basic logistic regression -----------------------------------------------

f1_glm <- glm(choice ~ Price + `Functional duration` + `Env. production impact` +
  `Energy efficiency` + Recyclability + Repairability, data = dfconj, family = binomial)


# assumption checks -------------------------------------------------------

# independence of errors
check_autocorrelation(f1_glm)

#  multicollinearity
vif(f1_glm) # Based on the VIF results, there is no significant multicollinearity 
# convert VIF results to a data frame 

vif_values <- vif(f1_glm)
vif_interpreted <- vif_values[, 3]  

# create data frame for the VIF table
vif_table <- data.frame(Variable = rownames(vif_values), VIF = vif_interpreted)

# export
stargazer(vif_table, type = "html", summary = FALSE,
          out = "/Users/cbrugge/Desktop/CE Paper/Review (JCP)/Review 2/diagnostics plots/vif_table.html")


# influential outliers
# cook's distance for detecting influential points
cooksd <- cooks.distance(f1_glm)

# Plot Cook's distance
plot(cooksd, type = "h", main = "Cook's Distance", ylab = "Cook's distance")
abline(h = 4/(nrow(dfconj)-length(coef(f1_glm))-1), col = "red")

# Display influential points
influential_points <- which(cooksd > (4/(nrow(dfconj)-length(coef(f1_glm))-1)))
influential_points

djconj <- dfconj %>%
  mutate(dffits = dffits(f1_glm))

threshold <- 2 * sqrt(length(coef(f1_glm)) / nobs(f1_glm))

djconj <- djconj %>%
  mutate(obs_number = row_number(),
         large = ifelse(abs(dffits) > threshold, "red", "black"))

# plot
ggplot(djconj, aes(x = obs_number, y = dffits, color = large)) +
  geom_jitter(width = 0.3, height = 0, alpha = 0.6) +
  geom_hline(yintercept = c(-threshold, threshold), color = "red") +
  scale_color_identity() +
  labs(title = "Influential Outliers Based on DFFITS",
       x = "Observation Number",
       y = "DFFITS") +
  theme_minimal()

# re-order attributes 

desired_order <- c("Price", "Functional duration", "Env. production impact",
                   "Energy efficiency", "Recyclability", "Repairability")

f1 <- choice ~ Price + `Functional duration` + `Env. production impact` +
  `Energy efficiency` + Recyclability + Repairability

mm <- cj(dfconj, f1, id = ~ PubId, h0 = 0.5, estimate = "mm", feature_labels = c("Price", "Functional duration", "Env. production impact",
                   "Energy efficiency", "Recyclability", "Repairability"), family = binomial) 
# info: mm function from cregg package automatically includes robust clustered standard errors
        
mm$feature <- factor(mm$feature, 
                          levels = desired_order,
                          labels = c("Price", 
                                     "Functional\nduration", 
                                     "Environmental\nproduction\nimpact", 
                                     "Energy\nefficiency", 
                                     "Recyclability", 
                                     "Repairability"))

mm_test <- cj(dfconj, f1, id = ~ PubId, h0 = 0.5, estimate = "mm", feature_order = desired_order)

mm_test$feature <- factor(mm_test$feature, 
                     levels = desired_order,
                     labels = c("Price", 
                                "Functional\nduration", 
                                "Environmental\nproduction\nimpact", 
                                "Energy\nefficiency", 
                                "Recyclability", 
                                "Repairability"))

p1 <- plot(mm_test, 
           weights = w8_weights_raking,
           vline = 0.5, 
           xlab = "Marginal means", 
           xlim = c(0.3, 0.7),
           legend_title = "Attributes",
           feature_headers = F) 

p1 <- p1 + ggplot2::facet_wrap(~feature, ncol = 1L, scales = "free_y", 
                               strip.position = "left") + #labels = c("Price", "Functional\nduration", 
#"Environmental\nproduction\nimpact", "Energy\nefficiency", 
#"Repair\n-ability", "Recycl\n-ability")) +
  geom_point(size = 3.5) +
  geom_linerange(aes(xmin = lower, xmax = upper), size = 1, position = position_dodge(width = 0.75)) +
  ggplot2::theme_grey() +
  theme(
    axis.text.y = element_text(size = 20,color = "black", margin = margin(b = 40)),
    axis.title.y = element_text(size = 15, margin = margin(t = 25)),
    axis.text.x = element_text(size = 20),
    axis.title.x = element_text(size = 20, margin = margin(t = 15)),
    plot.title = element_text(size = 15, margin = margin(b = 20)),
    legend.position = "none",  # Remove legend
    strip.text = element_text(size=15, face ="bold")
    )

print(p1)

# AMCEs 

amce <- amce(dfconj, f1, id = ~ PubId, level_order = "descending")

p2 <- (plot(amce, feature_headers = FALSE,
            vline = 0.5,
            xlab = "AMCEs on a scale from -1 to 1",
            ylab = "Attribute levels",
            legend_title = "Attributes"))

p2 + ggplot2::facet_wrap(~feature, ncol = 1L,
                         scales = "free_y", strip.position = "left") +
  ggtitle("Preferences")


# plot by product type ----------------------------------------------------


mm_stacked <- cj(dfconj, choice ~ Price + `Functional duration` + `Env. production impact` +
                   `Energy efficiency` + Recyclability + Repairability, id = ~ PubId,
                 estimate = "mm", by = ~ product_type, level_order = "ascending", feature_order = desired_order)

mm_stacked$feature <- factor(mm_stacked$feature, 
                          levels = desired_order,
                          labels = c("Price", 
                                     "Functional\nduration", 
                                     "Environmental\nproduction\nimpact", 
                                     "Energy\nefficiency", 
                                     "Recyclability", 
                                     "Repairability"))

p3 <- ggplot(mm_stacked, aes(x = estimate, y = level, color = product_type, fill = product_type)) +
  facet_wrap(~feature, ncol = 1L, scales = "free_y", strip.position = "left") +
  geom_vline(xintercept = 0.5, color = "grey") +
  geom_point(aes(color = product_type, fill = product_type), size = 3.5, position = position_dodge(width = 0.75)) +
  geom_linerange(aes(xmin = lower, xmax = upper, color = product_type), size = 1, position = position_dodge(width = 0.75)) +
  labs(x = "Marginal means") +  # Rename x-axis title
  theme_grey() +  # You can use another theme if you prefer
  theme(
    axis.text.y = element_text(size = 20,color = "black", margin = margin(b = 40)),
    axis.title.y = element_blank(), 
    axis.text.x = element_text(size = 20),
    axis.title.x = element_text(size = 20, margin = margin(t = 15)),
    plot.title = element_text(size = 15, margin = margin(b = 20)),
    legend.title = element_blank(),  # Remove legend
    legend.text = element_text(size = 20, margin = margin(t = 15)),
    strip.text = element_text(size=15, face ="bold")
  )

print(p3)
